EVD Simulated Outbreak
Data Cleaning
evd_linelist_raw <- here::here("data", "phm_evd_linelist_2017-10-27.xlsx") %>%
rio::import()
evd_contacts_raw <- here::here("data", "phm_evd_contacts_2017-10-27.xlsx") %>%
rio::import()
cleaning_rules <- here::here("data", "phm_evd_cleaning_rules.xlsx") %>%
rio::import()
evd_linelist_raw
## data.frame [12, 6]
## id chr 39e9dc 664549 B4D8AA 51883d 947e40 9aa197
## Date of Onset chr 43018 43024 43025 “18// 10/2017” 43028 43028
## Date.Report. chr 43030 43032 “23/10/2017” 22-10-2017 “2017/10/25” “2017-10-23”
## SEX. chr Female Male male male f f
## Âge dbl 62 28 54 57 23 66
## location chr Pseudopolis peudopolis Ankh-Morpork PSEUDOPOLIS Ankh Morpork AM
evd_contacts_raw
## data.frame [11, 2]
## Source Case #ID chr 51883d b4d8aa 39e9dc 39E9DC 51883d 39e9dc
## case.id chr 185911 e4b0a2 b4d8aa 601d2e 9AA197 51883d
cleaning_rules
## data.frame [6, 3]
## bad_spelling chr f m .missing am peudopolis .missing
## good_spelling chr female male unknown ankh_morpork pseudopolis unknown
## variable chr sex sex sex location location location
evd_linelist <- evd_linelist_raw %>%
linelist::clean_data(wordlist = cleaning_rules) %>%
mutate(across(contains("date"), guess_dates))
evd_linelist
## data.frame [12, 6]
## id chr 39e9dc 664549 b4d8aa 51883d 947e40 9aa197
## date_of_onset date 2017-10-10 2017-10-16 2017-10-17 2017-10-18 2017-10-20 2017-10-20
## date_report date 2017-10-22 2017-10-24 2017-10-23 2017-10-22 2017-10-25 2017-10-23
## sex chr female male male male female female
## age dbl 62 28 54 57 23 66
## location chr pseudopolis pseudopolis ankh_morpork pseudopolis ankh_morpork ankh_morpork
evd_contacts <- evd_contacts_raw %>%
linelist::clean_data()
evd_contacts
## data.frame [11, 2]
## source_case_id chr 51883d b4d8aa 39e9dc 39e9dc 51883d 39e9dc
## case_id chr 185911 e4b0a2 b4d8aa 601d2e 9aa197 51883d
Examine transmission chains
evd_chains <- make_epicontacts(
linelist = evd_linelist,
contacts = evd_contacts,
id = "id",
from = "source_case_id",
to = "case_id",
directed = TRUE
)
plot(
evd_chains,
node_shape = "sex",
node_color = "location",
shapes = c(male = "male", female = "female", unknown = "question-circle")
)
Examining incidence curves
evd_incidence <- incidence::incidence(
evd_linelist$date_of_onset,
last_date = "2017-10-27"
)
plot(evd_incidence, show_cases = TRUE)
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please use `guide = "none"` instead.

Estimating the growth rate (r)
evd_growth <- evd_incidence %>%
incidence::fit()
## Warning in incidence::fit(.): 10 dates with incidence of 0 ignored for fitting
plot(evd_incidence, show_cases = TRUE, fit = evd_growth)
## the argument `show_cases` requires the argument `stack = TRUE`
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please use `guide = "none"` instead.

Estimating the reproduction number (R)
evd_reproduction <- get_R(
evd_incidence,
si_mean = 15.3,
si_sd = 9.3
)
plot(evd_reproduction) +
geom_vline(xintercept = 1, color = "#af3939")

plot(evd_reproduction, "lambdas")
## Warning: Removed 1 rows containing missing values (position_stack).

Short term forecasting
evd_projection <- project(
evd_incidence,
R = sample_R(evd_reproduction, 5000),
si = evd_reproduction$si,
n_sim = 5000,
n_days = 14,
R_fix_within = TRUE
)
plot(evd_incidence) %>%
add_projections(evd_projection, quantiles = c(0.1, 0.25, 0.5, 0.75, 0.9)) +
scale_x_date() +
labs(
x = "Date of symptom onset"
)
## Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please use `guide = "none"` instead.

apply(evd_projection, 1, summary)
## 2017-10-28 2017-10-29 2017-10-30 2017-10-31 2017-11-01 2017-11-02 2017-11-03 2017-11-04 2017-11-05 2017-11-06 2017-11-07 2017-11-08 2017-11-09 2017-11-10
## Min. 0.0000 0.0000 0.0000 0.000 0.0000 0.0000 0.000 0.0000 0.0000 0.0000 0.0000 0.0000 0.000 0.0000
## 1st Qu. 1.0000 1.0000 1.0000 1.000 1.0000 1.0000 2.000 2.0000 2.0000 2.0000 2.0000 3.0000 3.000 3.0000
## Median 2.0000 2.0000 2.0000 2.000 2.0000 3.0000 3.000 3.0000 3.0000 4.0000 4.0000 5.0000 5.000 6.0000
## Mean 1.9966 2.1304 2.2196 2.439 2.6556 2.9562 3.296 3.6848 4.1086 4.6674 5.3146 5.9666 6.811 7.7592
## 3rd Qu. 3.0000 3.0000 3.0000 3.000 4.0000 4.0000 5.000 5.0000 6.0000 6.0000 7.0000 8.0000 9.000 10.0000
## Max. 11.0000 11.0000 11.0000 14.000 16.0000 15.0000 16.000 21.0000 21.0000 28.0000 37.0000 41.0000 53.000 64.0000
apply(evd_projection, 1, function(x) mean(x > 0))
## 2017-10-28 2017-10-29 2017-10-30 2017-10-31 2017-11-01 2017-11-02 2017-11-03 2017-11-04 2017-11-05 2017-11-06 2017-11-07 2017-11-08 2017-11-09 2017-11-10
## 0.8386 0.8624 0.8690 0.8814 0.8976 0.9154 0.9252 0.9340 0.9390 0.9590 0.9656 0.9620 0.9742 0.9778
UK COVID-19
Data Cleaning
covid_eng <- here::here("data", "covid_admissions_uk_2020_10_24.xlsx") %>%
rio::import() %>%
tibble() %>%
mutate(date = lubridate::as_date(date))
covid_eng
## tibble [31614, 5]
## date date 2020-03-20 2020-03-20 2020-03-20 2020-03-20 2020-03-20 2020-03-20
## region chr East of England East of England East of England East of England East of England East ~
## org_name chr Southend University Hospital NHS Foundation Trust Bedford Hospital NHS Trust Luton an~
## org_code chr RAJ RC1 RC9 RCX RD8 RDD
## n dbl 0 2 0 1 0 0
covid_eng %>%
group_by(region) %>%
summarise(cases = sum(n))
## tibble [7, 2]
## region chr East of England London Midlands North East and Yorkshire North West South East
## cases dbl 10699 25689 24199 21556 25041 14335
covid_eng %>%
group_by(region, org_code) %>%
summarise(cases = sum(n)) %>%
ggplot(aes(x = cases)) +
geom_histogram() +
facet_wrap(region ~.) +
labs(
title = "COVID-19 admissions by NHS region",
x = "Number of COVID-19 cases in a single trust",
y = "Frequency"
)
## `summarise()` has grouped output by 'region'. You can override using the `.groups` argument.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Epidemic curves
covid_incidence <- incidence2::incidence(
covid_eng,
date,
interval = "week",
groups = region,
count = n
)
summary(covid_incidence)
## date range: [2020-W12] to [2020-W43]
## n: 127806
## interval: 1 (Monday) week
## cumulative: FALSE
## timespan: 224 days
##
## 1 grouped variable
##
## region n
## <chr> <dbl>
## 1 East of England 10699
## 2 London 25689
## 3 Midlands 24199
## 4 North East and Yorkshire 21556
## 5 North West 25041
## 6 South East 14335
## 7 South West 6287
covid_incidence %>%
plot(fill = region, col_pal = muted, angle = 45, legend = "bottom") +
labs(title = "Weekly incidence of COVID-19 hospital admissions in England")

covid_incidence %>%
facet_plot(col_pal = muted, angle = 45, date_format = "%d%m%Y") +
labs(
title = "Weekly incidence of COVID-19 hospital admissions in England by region"
)

Growth rates
covid_incidence_recent <- incidence2::incidence(
covid_eng,
date,
groups = region,
count = n
) %>%
keep_last(4 * 7) %>%
keep_first(3 * 7)
summary(covid_incidence_recent)
## date range: [2020-09-27] to [2020-10-17]
## n: 9788
## interval: 1 day
## cumulative: FALSE
## timespan: 21 days
##
## 1 grouped variable
##
## region n
## <chr> <dbl>
## 1 East of England 405
## 2 London 931
## 3 Midlands 1688
## 4 North East and Yorkshire 2617
## 5 North West 3224
## 6 South East 534
## 7 South West 389
facet_plot(covid_incidence_recent)

covid_negbin_fit <- covid_incidence_recent %>%
i2extras::fit_curve(model = "negbin")
plot(covid_negbin_fit)

covid_negbin_growth <- covid_negbin_fit %>%
i2extras::growth_rate(growth_decay_time = TRUE)
covid_negbin_growth
## tibble [6, 10]
## count_variable chr n n n n n n
## region chr East of England Midlands North East and Yorkshire North West South East South W~
## model lst negbin [29, 1] negbin [29, 1] negbin [29, 1] negbin [29, 1] negbin [29, 1] negb~
## r dbl 0.048889 0.053999 0.049356 0.061524 0.072597 0.097432
## r_lower dbl 0.031449 0.042792 0.042414 0.051771 0.055169 0.07642
## r_upper dbl 0.06654 0.065281 0.056332 0.071329 0.090316 0.118985
## growth_or_decay chr doubling doubling doubling doubling doubling doubling
## time dbl 14.178045 12.836348 14.043732 11.266349 9.54785 7.114162
## time_lower dbl 10.416959 10.617951 12.30475 9.717624 7.674658 5.825487
## time_upper dbl 22.040373 16.197916 16.342247 13.388677 12.564161 9.0702
min_date <- min(covid_incidence_recent$date_index)
max_date <- max(covid_incidence_recent$date_index)
# covid_incidence_recent %>%
# incidence2::get_dates() %>%
# min()
covid_negbin_growth %>%
ggplot(aes(x = r, y = fct_reorder(region, r))) +
geom_errorbar(aes(xmin = r_lower, xmax = r_upper), width = 0.2) +
geom_point(color = "#7c1073", size = 3) +
geom_vline(aes(xintercept = 0), linetype = 2, color = "#3b3b3b") +
labs(
title = "Estimated daily growth rate of COVID-19 in England by region",
subtitle = glue::glue(
"based on hospital admissions, {min_date} - {max_date}"
)
)

covid_negbin_growth %>%
ggplot(aes(x = time, y = fct_reorder(region, time))) +
geom_errorbar(aes(xmin = time_lower, xmax = time_upper), width = 0.2) +
geom_point(color = "#7c1073", size = 3) +
geom_vline(aes(xintercept = 0), linetype = 2, color = "#3b3b3b") +
labs(
title = "Estimated doubling time of COVID-19 in England by region",
subtitle = glue::glue(
"based on hospital admissions, {min_date} - {max_date}"
)
)
